In [ ]:
## Install libraries
!pip install xarray pystac_client geopandas dask "dask[dataframe]" planetary-computer odc-stac odc-geo mapclassify spyndex zarr
In [1]:
import xarray as xr
import pystac_client
import odc.stac
from odc.geo.geobox import GeoBox
from dask.diagnostics import ProgressBar
from dask.distributed import Client, LocalCluster
import geopandas as gpd
import planetary_computer as pc
import odc.geo.xr
import spyndex
import os
from xrspatial import hillshade, slope, aspect
In [2]:
## (Optional) GDAL config
env = dict(GDAL_DISABLE_READDIR_ON_OPEN='EMPTY_DIR', 
           AWS_NO_SIGN_REQUEST='YES',
           GDAL_MAX_RAW_BLOCK_CACHE_SIZE='200000000',
           GDAL_SWATH_SIZE='200000000',
           VSI_CURL_CACHE_SIZE='200000000')
os.environ.update(env)
In [3]:
## Start Dask Client
client = Client(processes=True, n_workers=6)
client
Out[3]:

Client

Client-1855b170-d293-11ef-ab3b-424bb805562b

Connection method: Cluster object Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status

Cluster Info

LocalCluster

79af5e38

Dashboard: http://127.0.0.1:8787/status Workers: 6
Total threads: 12 Total memory: 36.00 GiB
Status: running Using processes: True

Scheduler Info

Scheduler

Scheduler-07008db2-dc2f-4b80-b163-572c7365c6c1

Comm: tcp://127.0.0.1:55292 Workers: 6
Dashboard: http://127.0.0.1:8787/status Total threads: 12
Started: Just now Total memory: 36.00 GiB

Workers

Worker: 0

Comm: tcp://127.0.0.1:55313 Total threads: 2
Dashboard: http://127.0.0.1:55315/status Memory: 6.00 GiB
Nanny: tcp://127.0.0.1:55295
Local directory: /var/folders/7x/v47p0s4s3bg3ytckj1t34h180000gn/T/dask-scratch-space/worker-rctg3f6o

Worker: 1

Comm: tcp://127.0.0.1:55314 Total threads: 2
Dashboard: http://127.0.0.1:55318/status Memory: 6.00 GiB
Nanny: tcp://127.0.0.1:55296
Local directory: /var/folders/7x/v47p0s4s3bg3ytckj1t34h180000gn/T/dask-scratch-space/worker-5oqzsvnd

Worker: 2

Comm: tcp://127.0.0.1:55320 Total threads: 2
Dashboard: http://127.0.0.1:55323/status Memory: 6.00 GiB
Nanny: tcp://127.0.0.1:55297
Local directory: /var/folders/7x/v47p0s4s3bg3ytckj1t34h180000gn/T/dask-scratch-space/worker-gzut5cgm

Worker: 3

Comm: tcp://127.0.0.1:55307 Total threads: 2
Dashboard: http://127.0.0.1:55309/status Memory: 6.00 GiB
Nanny: tcp://127.0.0.1:55298
Local directory: /var/folders/7x/v47p0s4s3bg3ytckj1t34h180000gn/T/dask-scratch-space/worker-n69vpbyy

Worker: 4

Comm: tcp://127.0.0.1:55308 Total threads: 2
Dashboard: http://127.0.0.1:55311/status Memory: 6.00 GiB
Nanny: tcp://127.0.0.1:55299
Local directory: /var/folders/7x/v47p0s4s3bg3ytckj1t34h180000gn/T/dask-scratch-space/worker-5sek23vi

Worker: 5

Comm: tcp://127.0.0.1:55317 Total threads: 2
Dashboard: http://127.0.0.1:55321/status Memory: 6.00 GiB
Nanny: tcp://127.0.0.1:55300
Local directory: /var/folders/7x/v47p0s4s3bg3ytckj1t34h180000gn/T/dask-scratch-space/worker-djrt1ww7
In [4]:
## Import boundary vector data
sa_bounds = gpd.read_file("./data/sa_boundary.gpkg")
sa_bounds.explore()
Out[4]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [5]:
## Create GeoBox
dx = 1000
bbox_hart = sa_bounds.to_crs("EPSG:9221") 
geobox = GeoBox.from_bbox(bbox_hart.total_bounds, crs="EPSG:9221", resolution=dx)
geobox
Out[5]:

GeoBox

Dimensions
1,598x1,419
EPSG
9221
Resolution
1000m
Cell
200px
WKT
PROJCRS["Hartebeesthoek94 / ZAF BSU Albers 25E",
    BASEGEOGCRS["Hartebeesthoek94",
        DATUM["Hartebeesthoek94",
            ELLIPSOID["WGS 84",6378137,298.257223563,
                LENGTHUNIT["metre",1]]],
        PRIMEM["Greenwich",0,
            ANGLEUNIT["degree",0.0174532925199433]],
        ID["EPSG",4148]],
    CONVERSION["South Africa Basic Survey Unit Albers 25E",
        METHOD["Albers Equal Area",
            ID["EPSG",9822]],
        PARAMETER["Latitude of false origin",-30,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8821]],
        PARAMETER["Longitude of false origin",25,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8822]],
        PARAMETER["Latitude of 1st standard parallel",-22,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8823]],
        PARAMETER["Latitude of 2nd standard parallel",-38,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8824]],
        PARAMETER["Easting at false origin",1400000,
            LENGTHUNIT["metre",1],
            ID["EPSG",8826]],
        PARAMETER["Northing at false origin",1300000,
            LENGTHUNIT["metre",1],
            ID["EPSG",8827]]],
    CS[Cartesian,2],
        AXIS["northing (N)",north,
            ORDER[1],
            LENGTHUNIT["metre",1]],
        AXIS["easting (E)",east,
            ORDER[2],
            LENGTHUNIT["metre",1]],
    USAGE[
        SCOPE["Spatial analysis for the purposes of Natural Capital Accounting."],
        AREA["South Africa - mainland - onshore and offshore."],
        BBOX[-38.17,13.33,-22.13,36.54]],
    ID["EPSG",9221]]
In [6]:
## Get Sentinel-2 data

#Set parameters
stac_client = 'https://planetarycomputer.microsoft.com/api/stac/v1'
bbox = list(sa_bounds.total_bounds)
time = "2024-03-01/2024-03-31"

# Search data 
items = pystac_client.Client.open(
    stac_client
    ).search(
        bbox=bbox,
        collections=['sentinel-2-l2a'],
        datetime=time
    ).item_collection()

#Load Data
ds_odc = odc.stac.load(
    items,
    bands=["red", "green", "blue", "nir", "SCL"],
    chunks={'time': 1, 'x': 100, 'y': 100},
    geobox=geobox,
    resampling="bilinear",
    patch_url=pc.sign
)
In [7]:
## Create Median Composite
def is_valid_pixel(data):
    return ((data > 3) & (data < 7)) | (data==11)
ds_odc['valid'] = is_valid_pixel(ds_odc.SCL)
ds_median = ds_odc.where(ds_odc.valid).median(dim="time")
In [8]:
## Get Digital Elevation Model
# Search data
items = pystac_client.Client.open(
    stac_client
    ).search(
        bbox=bbox,
        collections=["nasadem"],
    ).item_collection()

# Load data
dem_odc = odc.stac.load(
    items,
    bands=["elevation"],
    chunks={'time': 1, 'x': 100, 'y': 100},
    geobox=geobox,
    resampling="bilinear",
    patch_url=pc.sign
)
In [ ]:
## Combine datasets
ds_median["elev"] = dem_odc.isel(time = 0).elevation
In [9]:
## Execute 
data = ds_median.compute()
/Users/stevenhill/micromamba/envs/stac/lib/python3.10/site-packages/distributed/client.py:3371: UserWarning: Sending large graph of size 77.41 MiB.
This may cause some slowdown.
Consider loading the data with Dask directly
 or using futures or delayed objects to embed the data into the graph without repetition.
See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.
  warnings.warn(
/Users/stevenhill/micromamba/envs/stac/lib/python3.10/site-packages/rasterio/warp.py:387: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned.
  dest = _reproject(
/Users/stevenhill/micromamba/envs/stac/lib/python3.10/site-packages/rasterio/warp.py:387: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned.
  dest = _reproject(
/Users/stevenhill/micromamba/envs/stac/lib/python3.10/site-packages/rasterio/warp.py:387: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned.
  dest = _reproject(
/Users/stevenhill/micromamba/envs/stac/lib/python3.10/site-packages/rasterio/warp.py:387: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned.
  dest = _reproject(
In [20]:
## Plot RGB data
data.odc.explore(bands=["red","green","blue"],vmax=4000, robust=True)
/Users/stevenhill/micromamba/envs/stac/lib/python3.10/site-packages/odc/geo/_rgba.py:56: RuntimeWarning: invalid value encountered in cast
  return x.astype("uint8")
Out[20]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [22]:
## Compute different spectral indices
data["NDVI"] = spyndex.computeIndex(
    index=["NDVI"],
    params={
        "N": data.nir,
        "R": data.red
    }
)

data["EVI"] = spyndex.computeIndex(
    index=["EVI"],
    params={
        "C1": spyndex.constants["C1"].value,
        "C2": spyndex.constants["C2"].value,
        "g": spyndex.constants["g"].value,
        "L": spyndex.constants["L"].value,
        "N": data.nir,
        "R": data.red,
        "B": data.blue,
    },
)

data["GNDVI"] = spyndex.computeIndex(
    index=["GNDVI"],
    params={
        "N": data.nir,
        "G": data.green
    }
)
In [23]:
## Compute topographic indices 
data["hillshade"] = hillshade(data["elev"])
data["aspect"] = aspect(data["elev"])
data["slope"] = slope(data["elev"])
In [24]:
data = data.compute()
In [25]:
## Plot NDVI
data.NDVI.odc.explore()
Out[25]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [26]:
## Plot Slope
data.slope.odc.explore()
Out[26]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [27]:
## Save dataset
data.to_zarr("./data/dataset.zarr", write_empty_chunks=False)
Out[27]:
<xarray.backends.zarr.ZarrStore at 0x3b7ef7b50>
In [28]:
## Import dataset
data = xr.open_zarr("./data/dataset.zarr")
In [29]:
## Create random sample points
sample_points = sa_bounds.sample_points(1000).to_crs("EPSG:9221")
coords = sample_points.get_coordinates()
In [30]:
## Extract all variables
x = coords.x.to_list()
y = coords.y.to_list()
values_at_pts = data.sel(x=x, y=y, method="nearest")
In [31]:
values_at_pts
Out[31]:
<xarray.Dataset> Size: 56MB
Dimensions:      (y: 1000, x: 1000)
Coordinates:
    time         datetime64[ns] 8B ...
  * x            (x) float64 8kB 5.875e+05 6.165e+05 ... 2.138e+06 2.162e+06
  * y            (y) float64 8kB 1.418e+06 1.39e+06 ... 1.536e+06 1.592e+06
Data variables: (12/14)
    EVI          (y, x) float32 4MB dask.array<chunksize=(354, 399), meta=np.ndarray>
    GNDVI        (y, x) float32 4MB dask.array<chunksize=(354, 399), meta=np.ndarray>
    NDVI         (y, x) float32 4MB dask.array<chunksize=(354, 399), meta=np.ndarray>
    SCL          (y, x) float32 4MB dask.array<chunksize=(354, 399), meta=np.ndarray>
    aspect       (y, x) float32 4MB dask.array<chunksize=(354, 399), meta=np.ndarray>
    blue         (y, x) float32 4MB dask.array<chunksize=(354, 399), meta=np.ndarray>
    ...           ...
    hillshade    (y, x) float32 4MB dask.array<chunksize=(354, 399), meta=np.ndarray>
    nir          (y, x) float32 4MB dask.array<chunksize=(354, 399), meta=np.ndarray>
    red          (y, x) float32 4MB dask.array<chunksize=(354, 399), meta=np.ndarray>
    slope        (y, x) float32 4MB dask.array<chunksize=(354, 399), meta=np.ndarray>
    spatial_ref  int32 4B ...
    valid        (y, x) float64 8MB dask.array<chunksize=(177, 399), meta=np.ndarray>
xarray.Dataset
    • y: 1000
    • x: 1000
    • time
      ()
      datetime64[ns]
      ...
      [1 values with dtype=datetime64[ns]]
    • x
      (x)
      float64
      5.875e+05 6.165e+05 ... 2.162e+06
      crs :
      EPSG:9221
      resolution :
      1000.0
      units :
      metre
      array([ 587500.,  616500.,  620500., ..., 2125500., 2137500., 2161500.])
    • y
      (y)
      float64
      1.418e+06 1.39e+06 ... 1.592e+06
      crs :
      EPSG:9221
      resolution :
      -1000.0
      units :
      metre
      array([1417500., 1390500., 1448500., ..., 1536500., 1536500., 1591500.])
    • EVI
      (y, x)
      float32
      dask.array<chunksize=(354, 399), meta=np.ndarray>
      Array Chunk
      Bytes 3.81 MiB 551.74 kiB
      Shape (1000, 1000) (354, 399)
      Dask graph 9 chunks in 4 graph layers
      Data type float32 numpy.ndarray
      1000 1000
    • GNDVI
      (y, x)
      float32
      dask.array<chunksize=(354, 399), meta=np.ndarray>
      Array Chunk
      Bytes 3.81 MiB 551.74 kiB
      Shape (1000, 1000) (354, 399)
      Dask graph 9 chunks in 4 graph layers
      Data type float32 numpy.ndarray
      1000 1000
    • NDVI
      (y, x)
      float32
      dask.array<chunksize=(354, 399), meta=np.ndarray>
      Array Chunk
      Bytes 3.81 MiB 551.74 kiB
      Shape (1000, 1000) (354, 399)
      Dask graph 9 chunks in 4 graph layers
      Data type float32 numpy.ndarray
      1000 1000
    • SCL
      (y, x)
      float32
      dask.array<chunksize=(354, 399), meta=np.ndarray>
      Array Chunk
      Bytes 3.81 MiB 551.74 kiB
      Shape (1000, 1000) (354, 399)
      Dask graph 9 chunks in 4 graph layers
      Data type float32 numpy.ndarray
      1000 1000
    • aspect
      (y, x)
      float32
      dask.array<chunksize=(354, 399), meta=np.ndarray>
      Array Chunk
      Bytes 3.81 MiB 551.74 kiB
      Shape (1000, 1000) (354, 399)
      Dask graph 9 chunks in 4 graph layers
      Data type float32 numpy.ndarray
      1000 1000
    • blue
      (y, x)
      float32
      dask.array<chunksize=(354, 399), meta=np.ndarray>
      Array Chunk
      Bytes 3.81 MiB 551.74 kiB
      Shape (1000, 1000) (354, 399)
      Dask graph 9 chunks in 4 graph layers
      Data type float32 numpy.ndarray
      1000 1000
    • elev
      (y, x)
      float32
      dask.array<chunksize=(354, 399), meta=np.ndarray>
      grid_mapping :
      spatial_ref
      Array Chunk
      Bytes 3.81 MiB 551.74 kiB
      Shape (1000, 1000) (354, 399)
      Dask graph 9 chunks in 4 graph layers
      Data type float32 numpy.ndarray
      1000 1000
    • green
      (y, x)
      float32
      dask.array<chunksize=(354, 399), meta=np.ndarray>
      Array Chunk
      Bytes 3.81 MiB 551.74 kiB
      Shape (1000, 1000) (354, 399)
      Dask graph 9 chunks in 4 graph layers
      Data type float32 numpy.ndarray
      1000 1000
    • hillshade
      (y, x)
      float32
      dask.array<chunksize=(354, 399), meta=np.ndarray>
      Array Chunk
      Bytes 3.81 MiB 551.74 kiB
      Shape (1000, 1000) (354, 399)
      Dask graph 9 chunks in 4 graph layers
      Data type float32 numpy.ndarray
      1000 1000
    • nir
      (y, x)
      float32
      dask.array<chunksize=(354, 399), meta=np.ndarray>
      Array Chunk
      Bytes 3.81 MiB 551.74 kiB
      Shape (1000, 1000) (354, 399)
      Dask graph 9 chunks in 4 graph layers
      Data type float32 numpy.ndarray
      1000 1000
    • red
      (y, x)
      float32
      dask.array<chunksize=(354, 399), meta=np.ndarray>
      Array Chunk
      Bytes 3.81 MiB 551.74 kiB
      Shape (1000, 1000) (354, 399)
      Dask graph 9 chunks in 4 graph layers
      Data type float32 numpy.ndarray
      1000 1000
    • slope
      (y, x)
      float32
      dask.array<chunksize=(354, 399), meta=np.ndarray>
      Array Chunk
      Bytes 3.81 MiB 551.74 kiB
      Shape (1000, 1000) (354, 399)
      Dask graph 9 chunks in 4 graph layers
      Data type float32 numpy.ndarray
      1000 1000
    • spatial_ref
      ()
      int32
      ...
      GeoTransform :
      574000 1000 0 2169000 0 -1000
      crs_wkt :
      PROJCRS["Hartebeesthoek94 / ZAF BSU Albers 25E",BASEGEOGCRS["Hartebeesthoek94",DATUM["Hartebeesthoek94",ELLIPSOID["WGS 84",6378137,298.257223563,LENGTHUNIT["metre",1]]],PRIMEM["Greenwich",0,ANGLEUNIT["degree",0.0174532925199433]],ID["EPSG",4148]],CONVERSION["South Africa Basic Survey Unit Albers 25E",METHOD["Albers Equal Area",ID["EPSG",9822]],PARAMETER["Latitude of false origin",-30,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8821]],PARAMETER["Longitude of false origin",25,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8822]],PARAMETER["Latitude of 1st standard parallel",-22,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8823]],PARAMETER["Latitude of 2nd standard parallel",-38,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8824]],PARAMETER["Easting at false origin",1400000,LENGTHUNIT["metre",1],ID["EPSG",8826]],PARAMETER["Northing at false origin",1300000,LENGTHUNIT["metre",1],ID["EPSG",8827]]],CS[Cartesian,2],AXIS["northing (N)",north,ORDER[1],LENGTHUNIT["metre",1]],AXIS["easting (E)",east,ORDER[2],LENGTHUNIT["metre",1]],USAGE[SCOPE["Spatial analysis for the purposes of Natural Capital Accounting."],AREA["South Africa - mainland - onshore and offshore."],BBOX[-38.17,13.33,-22.13,36.54]],ID["EPSG",9221]]
      false_easting :
      1400000.0
      false_northing :
      1300000.0
      geographic_crs_name :
      Hartebeesthoek94
      grid_mapping_name :
      albers_conical_equal_area
      horizontal_datum_name :
      Hartebeesthoek94
      inverse_flattening :
      298.257223563
      latitude_of_projection_origin :
      -30.0
      longitude_of_central_meridian :
      25.0
      longitude_of_prime_meridian :
      0.0
      prime_meridian_name :
      Greenwich
      projected_crs_name :
      Hartebeesthoek94 / ZAF BSU Albers 25E
      reference_ellipsoid_name :
      WGS 84
      semi_major_axis :
      6378137.0
      semi_minor_axis :
      6356752.314245179
      spatial_ref :
      PROJCRS["Hartebeesthoek94 / ZAF BSU Albers 25E",BASEGEOGCRS["Hartebeesthoek94",DATUM["Hartebeesthoek94",ELLIPSOID["WGS 84",6378137,298.257223563,LENGTHUNIT["metre",1]]],PRIMEM["Greenwich",0,ANGLEUNIT["degree",0.0174532925199433]],ID["EPSG",4148]],CONVERSION["South Africa Basic Survey Unit Albers 25E",METHOD["Albers Equal Area",ID["EPSG",9822]],PARAMETER["Latitude of false origin",-30,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8821]],PARAMETER["Longitude of false origin",25,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8822]],PARAMETER["Latitude of 1st standard parallel",-22,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8823]],PARAMETER["Latitude of 2nd standard parallel",-38,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8824]],PARAMETER["Easting at false origin",1400000,LENGTHUNIT["metre",1],ID["EPSG",8826]],PARAMETER["Northing at false origin",1300000,LENGTHUNIT["metre",1],ID["EPSG",8827]]],CS[Cartesian,2],AXIS["northing (N)",north,ORDER[1],LENGTHUNIT["metre",1]],AXIS["easting (E)",east,ORDER[2],LENGTHUNIT["metre",1]],USAGE[SCOPE["Spatial analysis for the purposes of Natural Capital Accounting."],AREA["South Africa - mainland - onshore and offshore."],BBOX[-38.17,13.33,-22.13,36.54]],ID["EPSG",9221]]
      standard_parallel :
      [-22.0, -38.0]
      [1 values with dtype=int32]
    • valid
      (y, x)
      float64
      dask.array<chunksize=(177, 399), meta=np.ndarray>
      Array Chunk
      Bytes 7.63 MiB 551.74 kiB
      Shape (1000, 1000) (177, 399)
      Dask graph 18 chunks in 4 graph layers
      Data type float64 numpy.ndarray
      1000 1000
    • x
      PandasIndex
      PandasIndex(Index([ 587500.0,  616500.0,  620500.0,  634500.0,  634500.0,  652500.0,
              662500.0,  654500.0,  663500.0,  674500.0,
             ...
             2092500.0, 2096500.0, 2097500.0, 2100500.0, 2109500.0, 2111500.0,
             2114500.0, 2125500.0, 2137500.0, 2161500.0],
            dtype='float64', name='x', length=1000))
    • y
      PandasIndex
      PandasIndex(Index([1417500.0, 1390500.0, 1448500.0, 1436500.0, 1468500.0, 1322500.0,
             1254500.0, 1416500.0, 1306500.0, 1303500.0,
             ...
             1711500.0, 1676500.0, 1681500.0, 1478500.0, 1533500.0, 1469500.0,
             1503500.0, 1536500.0, 1536500.0, 1591500.0],
            dtype='float64', name='y', length=1000))